package com.amaake;

import com.amaake.Model.Zolurl;
import org.apache.log4j.Logger;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * \* Created with IntelliJ IDEA.
 * \* User: Amaake
 * \* Date: 2016/10/12 0012
 * \* Time: 2:14
 * \* Description: 春眠不觉晓，起来敲代码。
 * \
 */
public class ZolupurlPageProcesser implements PageProcessor {

    private static Logger log = Logger.getLogger(ZolupurlPageProcesser.class);

    private int type = 0;
    private int pagebar = 1;
    private int pagebars = 0;
    private String url = "";

    public ZolupurlPageProcesser(int type){
        log.info("识别type为"+type+"的爬虫更新链接开始启动------");
        this.type = type;
    }

    public void process(Page page) {
        log.info("第"+pagebar+"个url开始------"+page.getUrl());
        pagebar++;
        if(url.equals("")){
            log.info("当前顶级url为------"+page.getUrl());
            url = page.getUrl().toString();
        }
        if(pagebars==0){
            String page1 = page.getHtml().$("span.small-page-active").toString();
            String[] p = page1.split("</b>");
            pagebars = Integer.parseInt(p[1].substring(1,p[1].length()-7));
            log.info("当前总分页为------"+pagebars);
        }
        Zolurl zurl = null;
        List<Selectable> list =  page.getHtml().xpath("//div[@class='list-item']").nodes();
        for(Selectable l : list){
            String title = l.xpath("//div[@class='pro-intro']/h3/a/text()").toString();
            log.info("获取产品------"+title);
            String img = l.$("img","src").toString();
            log.info("获取产品图片------"+img);
            String link = l.regex("(http://detail.zol.com.cn/\\w+/\\w+/param.shtml)").toString();
            log.info("获取产品连接------"+link);
            zurl = Zolurl.dao.findFirst("select * from zolurl where title='"+title+"'");
            if(zurl==null){
                zurl = new Zolurl();
                zurl.set("url",link);
                zurl.set("title",title);
                zurl.set("img",img);
                zurl.set("type",this.type);
                zurl.save();
            }else{
                zurl.set("url",link);
                zurl.set("title",title);
                zurl.set("img",img);
                zurl.update();
            }
        }

//        List<String> links = page.getHtml().links().regex("(http://detail.zol.com.cn/\\w+/\\w+/param.shtml)").all();

        if(pagebar<pagebars){
            page.addTargetRequest(url+pagebar+".html");
        }

    }

    public Site getSite() {
        return Site.me().setRetryTimes(3).setSleepTime(1000);
    }
}